// ===================================================================
//
// Data for validation figure on incumbent innovation across gaps
//
// ===================================================================

* pat63_02f.dta is the updated data from NBER Patent Data Project. 
* It can be downloaded from http://elsa.berkeley.edu/~bhhall/patents.html

use "pat63_02f.dta", clear
keep if country=="US"
drop if assignee == 0
drop if appyear==.
drop if ipc4==""
bys appyear ipc4: gen kus=_N
sort assignee ipc4 appyear
by assignee ipc4: gen vec_ent = 1 if _n == 1
bys appyear ipc4: egen entry_us = total(vec_ent)
sort assignee ipc4 appyear
bys assignee ipc4: drop if _n == 1
bys appyear ipc4 assignee: keep if _n==1
keep appyear ipc4 kus assignee entry_us
sort appyear ipc4
save patUS.dta, replace

use "pat63_02f.dta", clear
drop if assignee == 0
drop if appyear==.
drop if country==""
drop if ipc4==""
keep if country=="JP" | country=="GB" | country=="DE" | country=="FR" | country=="CA" | country=="IT"
keep  appyear ipc4 
bys appyear ipc4: gen kfrgn=_N
bys appyear ipc4: keep if _n==1
sort appyear ipc4
gen vecUS = 0


merge 1:m appyear ipc4 using patUS.dta
erase patUS.dta

keep if appyear >= 1975 & appyear<=1995

replace kfrgn=0 if kfrgn==.
replace kus=0.1 if kus==.

gen frgn_us = kfrgn/kus
gen frgn_tot = kfrgn/(kfrgn+kus)
replace frgn_us = 1 if kfrgn == 0 & kus > .09 & kus < .11
replace frgn_tot = .5 if kfrgn == 0 & kus > .09 & kus < .11


//------------------ To generate IPC sector levels -----------------------

drop _m 
replace kus = 0 if kus > .09 & kus < .11

keep if(regexm(ipc4,"^[A-Z]")) 
gen ipc_sector = substr(ipc4,1,1)
egen id_sector = group(ipc_sector)

replace vecUS = 1 if kus>0
bys appyear ipc4: egen kusf0 = total((assignee==0)*vecUS)
bys appyear ipc4: gen kusf = _N*vecUS
bys appyear ipc4: keep if _n==1

keep appyear kfrgn kus kusf kusf0 entry_us
gen ktot = kus+kfrgn
drop if ktot<10

gen us_rat = 100*kus/ktot
sort us_rat
gen bin = ceil(us_rat/(100/33))
bys bin: gen N_bin = _N
gen freq = N_bin/_N
replace freq = round(10^5*freq)/10^3

bys bin: egen kus_tot = total(kus-entry_us)
bys bin: egen kusf_tot = total(kusf)
bys bin: egen kusf0_tot = total(kusf0)
gen kus_mean = kus_tot/kusf_tot
bys bin: keep if _n==1
egen tot = total(freq)

gen bin2 = bin-17

twoway connected kus_mean bin2, clwidth(medthick) mcolor(black) lpattern(solid) lcolor(black) ///
	   ylabel(1(1)4,angle(0)) ytitle("{stSerif:Patent application per firm, avg.}",size(medlarge)) ///
	   xtitle({stSerif:Gaps},size(medlarge)) xlabel(-15(5)15) name(gr6,replace)  graphregion(color(white))
